@article{lee-etal-2022-annotation,
title = "Annotation Curricula to Implicitly Train Non-Expert Annotators",
author = "Lee, Ji-Ung and
Klie, Jan-Christoph and
Gurevych, Iryna",
journal = "Computational Linguistics",
volume = "48",
number = "2",
month = jun,
year = "2022",
address = "Cambridge, MA",
publisher = "MIT Press",
url = "https://aclanthology.org/2022.cl-2.4",
doi = "10.1162/coli_a_00436",
pages = "343--373",
abstract = "Annotation studies often require annotators to familiarize themselves with the task, its annotation scheme, and the data domain. This can be overwhelming in the beginning, mentally taxing, and induce errors into the resulting annotations; especially in citizen science or crowdsourcing scenarios where domain expertise is not required. To alleviate these issues, this work proposes annotation curricula, a novel approach to implicitly train annotators. The goal is to gradually introduce annotators into the task by ordering instances to be annotated according to a learning curriculum. To do so, this work formalizes annotation curricula for sentence- and paragraph-level annotation tasks, defines an ordering strategy, and identifies well-performing heuristics and interactively trained models on three existing English datasets. Finally, we provide a proof of concept for annotation curricula in a carefully designed user study with 40 voluntary participants who are asked to identify the most fitting misconception for English tweets about the Covid-19 pandemic. The results indicate that using a simple heuristic to order instances can already significantly reduce the total annotation time while preserving a high annotation quality. Annotation curricula thus can be a promising research direction to improve data collection. To facilitate future research{---}for instance, to adapt annotation curricula to specific tasks and expert annotation scenarios{---}all code and data from the user study consisting of 2,400 annotations is made available.1",
}
<?xml version="1.0" encoding="UTF-8"?>
<modsCollection xmlns="http://www.loc.gov/mods/v3">
<mods ID="lee-etal-2022-annotation">
<titleInfo>
<title>Annotation Curricula to Implicitly Train Non-Expert Annotators</title>
</titleInfo>
<name type="personal">
<namePart type="given">Ji-Ung</namePart>
<namePart type="family">Lee</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Jan-Christoph</namePart>
<namePart type="family">Klie</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<name type="personal">
<namePart type="given">Iryna</namePart>
<namePart type="family">Gurevych</namePart>
<role>
<roleTerm authority="marcrelator" type="text">author</roleTerm>
</role>
</name>
<originInfo>
<dateIssued>2022-06</dateIssued>
</originInfo>
<typeOfResource>text</typeOfResource>
<genre authority="bibutilsgt">journal article</genre>
<relatedItem type="host">
<titleInfo>
<title>Computational Linguistics</title>
</titleInfo>
<originInfo>
<issuance>continuing</issuance>
<publisher>MIT Press</publisher>
<place>
<placeTerm type="text">Cambridge, MA</placeTerm>
</place>
</originInfo>
<genre authority="marcgt">periodical</genre>
<genre authority="bibutilsgt">academic journal</genre>
</relatedItem>
<abstract>Annotation studies often require annotators to familiarize themselves with the task, its annotation scheme, and the data domain. This can be overwhelming in the beginning, mentally taxing, and induce errors into the resulting annotations; especially in citizen science or crowdsourcing scenarios where domain expertise is not required. To alleviate these issues, this work proposes annotation curricula, a novel approach to implicitly train annotators. The goal is to gradually introduce annotators into the task by ordering instances to be annotated according to a learning curriculum. To do so, this work formalizes annotation curricula for sentence- and paragraph-level annotation tasks, defines an ordering strategy, and identifies well-performing heuristics and interactively trained models on three existing English datasets. Finally, we provide a proof of concept for annotation curricula in a carefully designed user study with 40 voluntary participants who are asked to identify the most fitting misconception for English tweets about the Covid-19 pandemic. The results indicate that using a simple heuristic to order instances can already significantly reduce the total annotation time while preserving a high annotation quality. Annotation curricula thus can be a promising research direction to improve data collection. To facilitate future research—for instance, to adapt annotation curricula to specific tasks and expert annotation scenarios—all code and data from the user study consisting of 2,400 annotations is made available.1</abstract>
<identifier type="citekey">lee-etal-2022-annotation</identifier>
<identifier type="doi">10.1162/coli_a_00436</identifier>
<location>
<url>https://aclanthology.org/2022.cl-2.4</url>
</location>
<part>
<date>2022-06</date>
<detail type="volume"><number>48</number></detail>
<detail type="issue"><number>2</number></detail>
<extent unit="page">
<start>343</start>
<end>373</end>
</extent>
</part>
</mods>
</modsCollection>
%0 Journal Article
%T Annotation Curricula to Implicitly Train Non-Expert Annotators
%A Lee, Ji-Ung
%A Klie, Jan-Christoph
%A Gurevych, Iryna
%J Computational Linguistics
%D 2022
%8 June
%V 48
%N 2
%I MIT Press
%C Cambridge, MA
%F lee-etal-2022-annotation
%X Annotation studies often require annotators to familiarize themselves with the task, its annotation scheme, and the data domain. This can be overwhelming in the beginning, mentally taxing, and induce errors into the resulting annotations; especially in citizen science or crowdsourcing scenarios where domain expertise is not required. To alleviate these issues, this work proposes annotation curricula, a novel approach to implicitly train annotators. The goal is to gradually introduce annotators into the task by ordering instances to be annotated according to a learning curriculum. To do so, this work formalizes annotation curricula for sentence- and paragraph-level annotation tasks, defines an ordering strategy, and identifies well-performing heuristics and interactively trained models on three existing English datasets. Finally, we provide a proof of concept for annotation curricula in a carefully designed user study with 40 voluntary participants who are asked to identify the most fitting misconception for English tweets about the Covid-19 pandemic. The results indicate that using a simple heuristic to order instances can already significantly reduce the total annotation time while preserving a high annotation quality. Annotation curricula thus can be a promising research direction to improve data collection. To facilitate future research—for instance, to adapt annotation curricula to specific tasks and expert annotation scenarios—all code and data from the user study consisting of 2,400 annotations is made available.1
%R 10.1162/coli_a_00436
%U https://aclanthology.org/2022.cl-2.4
%U https://doi.org/10.1162/coli_a_00436
%P 343-373
Markdown (Informal)
[Annotation Curricula to Implicitly Train Non-Expert Annotators](https://aclanthology.org/2022.cl-2.4) (Lee et al., CL 2022)
ACL